{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Brand Name</th>\n",
       "      <th>Price</th>\n",
       "      <th>Rating</th>\n",
       "      <th>Reviews</th>\n",
       "      <th>Review Votes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
       "      <td>Samsung</td>\n",
       "      <td>199.99</td>\n",
       "      <td>5</td>\n",
       "      <td>I feel so LUCKY to have found this used (phone...</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
       "      <td>Samsung</td>\n",
       "      <td>199.99</td>\n",
       "      <td>4</td>\n",
       "      <td>nice phone, nice up grade from my pantach revu...</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
       "      <td>Samsung</td>\n",
       "      <td>199.99</td>\n",
       "      <td>5</td>\n",
       "      <td>Very pleased</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
       "      <td>Samsung</td>\n",
       "      <td>199.99</td>\n",
       "      <td>4</td>\n",
       "      <td>It works good but it goes slow sometimes but i...</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
       "      <td>Samsung</td>\n",
       "      <td>199.99</td>\n",
       "      <td>4</td>\n",
       "      <td>Great phone to replace my lost phone. The only...</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                        Product Name Brand Name   Price  \\\n",
       "0  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
       "1  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
       "2  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
       "3  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
       "4  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
       "\n",
       "   Rating                                            Reviews  Review Votes  \n",
       "0       5  I feel so LUCKY to have found this used (phone...           1.0  \n",
       "1       4  nice phone, nice up grade from my pantach revu...           0.0  \n",
       "2       5                                       Very pleased           0.0  \n",
       "3       4  It works good but it goes slow sometimes but i...           0.0  \n",
       "4       4  Great phone to replace my lost phone. The only...           0.0  "
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "df = pd.read_csv('Amazon_Unlocked_Mobile.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Brand Name</th>\n",
       "      <th>Price</th>\n",
       "      <th>Rating</th>\n",
       "      <th>Reviews</th>\n",
       "      <th>Review Votes</th>\n",
       "      <th>Positively Rated</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
       "      <td>Samsung</td>\n",
       "      <td>199.99</td>\n",
       "      <td>5</td>\n",
       "      <td>I feel so LUCKY to have found this used (phone...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
       "      <td>Samsung</td>\n",
       "      <td>199.99</td>\n",
       "      <td>4</td>\n",
       "      <td>nice phone, nice up grade from my pantach revu...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
       "      <td>Samsung</td>\n",
       "      <td>199.99</td>\n",
       "      <td>5</td>\n",
       "      <td>Very pleased</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
       "      <td>Samsung</td>\n",
       "      <td>199.99</td>\n",
       "      <td>4</td>\n",
       "      <td>It works good but it goes slow sometimes but i...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
       "      <td>Samsung</td>\n",
       "      <td>199.99</td>\n",
       "      <td>4</td>\n",
       "      <td>Great phone to replace my lost phone. The only...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
       "      <td>Samsung</td>\n",
       "      <td>199.99</td>\n",
       "      <td>1</td>\n",
       "      <td>I already had a phone with problems... I know ...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
       "      <td>Samsung</td>\n",
       "      <td>199.99</td>\n",
       "      <td>2</td>\n",
       "      <td>The charging port was loose. I got that solder...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
       "      <td>Samsung</td>\n",
       "      <td>199.99</td>\n",
       "      <td>2</td>\n",
       "      <td>Phone looks good but wouldn't stay charged, ha...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
       "      <td>Samsung</td>\n",
       "      <td>199.99</td>\n",
       "      <td>5</td>\n",
       "      <td>I originally was using the Samsung S2 Galaxy f...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
       "      <td>Samsung</td>\n",
       "      <td>199.99</td>\n",
       "      <td>3</td>\n",
       "      <td>It's battery life is great. It's very responsi...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                        Product Name Brand Name   Price  \\\n",
       "0  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
       "1  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
       "2  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
       "3  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
       "4  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
       "5  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
       "6  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
       "7  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
       "8  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
       "9  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
       "\n",
       "   Rating                                            Reviews  Review Votes  \\\n",
       "0       5  I feel so LUCKY to have found this used (phone...           1.0   \n",
       "1       4  nice phone, nice up grade from my pantach revu...           0.0   \n",
       "2       5                                       Very pleased           0.0   \n",
       "3       4  It works good but it goes slow sometimes but i...           0.0   \n",
       "4       4  Great phone to replace my lost phone. The only...           0.0   \n",
       "5       1  I already had a phone with problems... I know ...           1.0   \n",
       "6       2  The charging port was loose. I got that solder...           0.0   \n",
       "7       2  Phone looks good but wouldn't stay charged, ha...           0.0   \n",
       "8       5  I originally was using the Samsung S2 Galaxy f...           0.0   \n",
       "9       3  It's battery life is great. It's very responsi...           0.0   \n",
       "\n",
       "   Positively Rated  \n",
       "0                 1  \n",
       "1                 1  \n",
       "2                 1  \n",
       "3                 1  \n",
       "4                 1  \n",
       "5                 0  \n",
       "6                 0  \n",
       "7                 0  \n",
       "8                 1  \n",
       "9                 0  "
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dropna(inplace=True)\n",
    "df[df['Rating'] != 3]\n",
    "df['Positively Rated'] = np.where(df['Rating'] > 3, 1, 0)\n",
    "df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.6899487041440472"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Positively Rated'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(df['Reviews'], df['Positively Rated'], random_state = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "X_train first entry: \n",
      "\n",
      " I feel so LUCKY to have found this used (phone to us & not used hard at all), phone on line from someone who upgraded and sold this one. My Son liked his old one that finally fell apart after 2.5+ years and didn't want an upgrade!! Thank you Seller, we really appreciate it & your honesty re: said used phone.I recommend this seller very highly & would but from them again!!\n",
      "\n",
      "\n",
      "X_train shape:  (250751,)\n"
     ]
    }
   ],
   "source": [
    "print('X_train first entry: \\n\\n', X_train[0])\n",
    "print('\\n\\nX_train shape: ', X_train.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# CountVectorizer\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "\n",
    "vect = CountVectorizer().fit(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['00',\n",
       " '858',\n",
       " 'approval',\n",
       " 'booth',\n",
       " 'cmon',\n",
       " 'dealsthanks',\n",
       " 'eclair',\n",
       " 'ff',\n",
       " 'gsmpros',\n",
       " 'insertion',\n",
       " 'linkhttps',\n",
       " 'movment',\n",
       " 'outmoded',\n",
       " 'preset',\n",
       " 'reinstallation',\n",
       " 'separatingly',\n",
       " 'stillnumbers',\n",
       " 'todos',\n",
       " 'verycool']"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vect.get_feature_names()[::3000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "56947"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(vect.get_feature_names())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<250751x56947 sparse matrix of type '<class 'numpy.int64'>'\n",
       "\twith 6848632 stored elements in Compressed Sparse Row format>"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# transform the documents in the training data to a document-term matrix\n",
    "X_train_vectorized = vect.transform(X_train)\n",
    "X_train_vectorized"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
       "          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n",
       "          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n",
       "          verbose=0, warm_start=False)"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "\n",
    "model = LogisticRegression()\n",
    "model.fit(X_train_vectorized, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AUC:  0.896300276575\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import roc_auc_score\n",
    "\n",
    "predictions = model.predict(vect.transform(X_test))\n",
    "\n",
    "print('AUC: ', roc_auc_score(y_test, predictions))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Smallest Coefs: \n",
      "['raymond' 'stylist' 'mony' 'false' 'worst' 'unsatisfied' 'disconnects'\n",
      " 'pos' 'horribly' 'lies']\n",
      "\n",
      "Largest Coefs: \n",
      "['excelent' 'excelente' '4eeeks' 'perfecto' 'exelente' 'loving' 'awsome'\n",
      " 'excellent' 'becuse' 'sweet']\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# get the feature names as numpy array\n",
    "feature_names = np.array(vect.get_feature_names())\n",
    "\n",
    "# Sort the coefficients from the model\n",
    "sorted_coef_index = model.coef_[0].argsort()\n",
    "\n",
    "# Find the 10 smallest and 10 largest coefficients\n",
    "# The 10 largest coefficients are being indexed using [:-11:-1] \n",
    "# so the list returned is in order of largest to smallest\n",
    "print('Smallest Coefs: \\n{}\\n'.format(feature_names[sorted_coef_index[:10]]))\n",
    "print('Largest Coefs: \\n{}\\n'.format(feature_names[sorted_coef_index[:-11:-1]]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "18951"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Tfidf\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "\n",
    "# Fit the TfidfVectorizer to the training data specifiying a minimum document frequency of 5\n",
    "vect = TfidfVectorizer(min_df = 5).fit(X_train)\n",
    "len(vect.get_feature_names())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AUC:  0.900231756945\n"
     ]
    }
   ],
   "source": [
    "X_train_vectorized = vect.transform(X_train)\n",
    "\n",
    "model = LogisticRegression()\n",
    "model.fit(X_train_vectorized, y_train)\n",
    "predictions = model.predict(vect.transform(X_test))\n",
    "print('AUC: ', roc_auc_score(y_test, predictions))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Smallest Tfidf: \n",
      "['brawns' 'messiah' 'excites' 'reading___' 'seizing' 'srgb' 'liquidating'\n",
      " '16nm' '1b' '700nits']\n",
      "\n",
      "Largest Tfidf: \n",
      "['excllent' 'ecxelente' 'purchase' 'eh' 'gud' 'looser' 'gucci' 'soo'\n",
      " 'unusable' 'lost']\n",
      "\n"
     ]
    }
   ],
   "source": [
    "feature_names = np.array(vect.get_feature_names())\n",
    "\n",
    "sorted_tfidf_index = X_train_vectorized.max(0).toarray()[0].argsort()\n",
    "\n",
    "print('Smallest Tfidf: \\n{}\\n'.format(feature_names[sorted_tfidf_index[:10]]))\n",
    "print('Largest Tfidf: \\n{}\\n'.format(feature_names[sorted_tfidf_index[:-11:-1]]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Smallest coef: \n",
      "['not' 'worst' 'disappointed' 'waste' 'poor' 'terrible' 'return' 'stopped'\n",
      " 'slow' 'returning']\n",
      "\n",
      "Largest coef: \n",
      "['love' 'great' 'amazing' 'excellent' 'perfect' 'loves' 'best' 'awesome'\n",
      " 'perfectly' 'easy']\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sorted_coef_index = model.coef_[0].argsort()\n",
    "\n",
    "print('Smallest coef: \\n{}\\n'.format(feature_names[sorted_coef_index[:10]]))\n",
    "print('Largest coef: \\n{}\\n'.format(feature_names[sorted_coef_index[:-11:-1]]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 0]\n"
     ]
    }
   ],
   "source": [
    "# These reviews are treated the same by our current model\n",
    "\n",
    "print(model.predict(vect.transform(['Not an issue, phone is working', \n",
    "                                   'an issue, phone is not working'])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "217383"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# n-grams\n",
    "# Fit the CountVectorizer to the training data specifiying a minimum \n",
    "# document frequency of 5 and extracting 1-grams and 2-grams\n",
    "vect = CountVectorizer(min_df = 5, ngram_range = (1,2)).fit(X_train)\n",
    "X_train_vectorized = vect.transform(X_train)\n",
    "len(vect.get_feature_names())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AUC:  0.948236892649\n"
     ]
    }
   ],
   "source": [
    "model = LogisticRegression()\n",
    "model.fit(X_train_vectorized, y_train)\n",
    "\n",
    "predictions = model.predict(vect.transform(X_test))\n",
    "print('AUC: ', roc_auc_score(y_test, predictions))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Smallest Coef: \n",
      "['junk' 'no good' 'worst' 'good love' 'horrible' 'nope' 'terrible'\n",
      " 'needed good' 'three stars' 'not happy']\n",
      "\n",
      "Largest Coef: \n",
      "['excelent' 'excelente' 'excellent' 'perfect' 'no issues' 'no problems'\n",
      " 'perfecto' 'exelente' 'awesome' 'awsome']\n",
      "\n"
     ]
    }
   ],
   "source": [
    "feature_names = np.array(vect.get_feature_names())\n",
    "sorted_coef_index = model.coef_[0].argsort()\n",
    "\n",
    "print('Smallest Coef: \\n{}\\n'.format(feature_names[sorted_coef_index][:10]))\n",
    "print('Largest Coef: \\n{}\\n'.format(feature_names[sorted_coef_index][:-11:-1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 0]\n"
     ]
    }
   ],
   "source": [
    "print(model.predict(vect.transform(['not an issue, phone is working',\n",
    "                                   'an issue, phone is not working'])))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
